import keras as keras

(x_train, y_train), (x_valid, y_valid) = keras.datasets.mnist.load_data()
assert x_train.shape == (60000, 28, 28)
assert x_valid.shape == (10000, 28, 28)
assert y_train.shape == (60000,)
assert y_valid.shape == (10000,)

# step1: use sequential
model = keras.models.Sequential()

# step2: add layer
model.add(keras.layers.Flatten(input_shape=(x_train.shape[1], x_train.shape[2])))
model.add(keras.layers.Dense(units=784, activation="relu", input_dim=784))
model.add(keras.layers.Dense(units=10, activation="softmax"))

# step3: compile model
# optimizer = keras.optimizers.SGD(learning_rate=0.001)
# optimizer = keras.optimizers.Adam(learning_rate=0.001)
# optimizer = keras.optimizers.RMSprop(learning_rate=0.001)
optimizer = keras.optimizers.Adamax(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()
model.fit(x_train, y_train, batch_size=100, epochs=5)
